﻿using NPOI.SS.UserModel;
using System.Data;
using System.Text.RegularExpressions;

namespace Singer.Extensions.Excel;

public static partial class ExcelTools
{
    /// <summary>
    /// 读取Excel到DataSet （从本地路径读取）
    /// </summary>
    /// <param name="filePath">本地excel文件路径</param>
    /// <param name="sheetName">表格名,为空时读取全部表格，每个sheet对应一个DataTable</param>
    /// <returns>ds读取的全部sheet的DataTable集合DataSet  titleRowNumbers 全部sheet的标题行行号集合</returns>
    public static (DataSet? ds, List<int> titleRowNumbers) ReadExcelToDataSet(string filePath, string? sheetName = null)
    {
        if (!File.Exists(filePath)) return (null, new List<int>());
        using FileStream fs = new FileStream(filePath, FileMode.Open, FileAccess.Read);
        return ReadExcelToDataSet(fs, sheetName);
    }

    /// <summary>
    /// 读取Excel到DataSet （从文件流中读取）
    /// </summary>
    /// <param name="fs">excel文件流</param>
    /// <param name="sheetName">表格名,为空时读取全部表格，每个sheet对应一个DataTable</param>
    /// <returns>ds读取的全部sheet的DataTable集合DataSet  titleRowNumbers 全部sheet的标题行行号集合</returns>
    public static (DataSet? ds, List<int> titleRowNumbers) ReadExcelToDataSet(FileStream fs, string? sheetName = null)
    {
        List<int> titleRowNumbers = new List<int>();
        IWorkbook workbook = WorkbookFactory.Create(fs);
        //获取sheet信息
        ISheet? sheet = null;
        DataSet ds = new DataSet();
        if (!string.IsNullOrWhiteSpace(sheetName))
        {
            sheet = workbook.GetSheet(sheetName);
            if (sheet == null) return (null, titleRowNumbers);
            (DataTable? dt, int titleRowNumber) = ReadSheetToDataTable(sheet);
            if (dt != null)
            {
                ds.Tables.Add(dt);
                titleRowNumbers.Add(titleRowNumber);
            }
        }
        else
        {
            //遍历获取所有数据
            int sheetCount = workbook.NumberOfSheets;
            for (int i = 0; i < sheetCount; i++)
            {
                sheet = workbook.GetSheetAt(i);
                if (sheet != null)
                {
                    (DataTable? dt, int titleRowNumber) = ReadSheetToDataTable(sheet);
                    if (dt != null)
                    {
                        ds.Tables.Add(dt);
                        titleRowNumbers.Add(titleRowNumber);
                    }
                }
            }
        }
        return (ds, titleRowNumbers);
    }

    /// <summary>
    /// 读取sheet信息到DataTable
    /// </summary>
    /// <returns>dt表格数据，titleRowNumber表格标题行的行号</returns>
    private static (DataTable? dt, int titleRowNumber) ReadSheetToDataTable(ISheet sheet)
    {
        DataTable dt = new DataTable(sheet.SheetName);
        //获取列信息
        IRow cells = sheet.GetRow(sheet.FirstRowNum);
        //空数据化返回
        if (cells == null) return (null, 0);
        int cellsCount = cells.PhysicalNumberOfCells;
        //空列返回
        if (cellsCount == 0) return (null, 0);
        int emptyCount = 0;
        int titleRowIndex = sheet.FirstRowNum;
        List<string> listColumns = new List<string>();
        while (true)
        {
            emptyCount = 0;
            listColumns.Clear();
            for (int i = 0; i < cellsCount; i++)
            {
                if (string.IsNullOrEmpty(cells.GetCell(i).StringCellValue))
                    emptyCount++;
                listColumns.Add(cells.GetCell(i).StringCellValue);
            }
            //这里根据逻辑需要，空列超过多少判断
            if (emptyCount == 0) break;
            titleRowIndex++;
            cells = sheet.GetRow(titleRowIndex);
        }

        foreach (string columnName in listColumns)
        {
            if (dt.Columns.Contains(columnName))
            {
                //如果允许有重复列名，自己做处理
                continue;
            }
            dt.Columns.Add(columnName, typeof(string));
        }
        //开始获取数据
        int rowsCount = sheet.LastRowNum + 1;//最后一行的索引+1 就是总行数
        DataRow dr = null;
        //titleRowIndex + 1  (标题行索引 + 1 = 数据行索引)  遍历数据行获取数据
        for (int i = titleRowIndex + 1; i < rowsCount; i++)
        {
            cells = sheet.GetRow(i);
            dr = dt.NewRow();
            for (int j = 0; j < dt.Columns.Count; j++)
            {
                if (cells?.GetCell(j) == null)
                {
                    dr[j] = null;
                    continue;
                }
                //这里可以判断数据类型
                switch (cells.GetCell(j).CellType)
                {
                    case CellType.String:
                        dr[j] = cells.GetCell(j).StringCellValue;
                        break;
                    case CellType.Numeric:
                        dr[j] = cells.GetCell(j).NumericCellValue.ToString();
                        break;
                    case CellType.Unknown:
                        dr[j] = cells.GetCell(j).StringCellValue;
                        break;
                }
            }
            dt.Rows.Add(dr);
        }
        return (dt, titleRowIndex + 1);
    }

    /// <summary>
    /// 将.csv文件读入到DataTable
    /// </summary>
    /// <param name="filePath">csv文件路径</param>
    /// <param name="titleRowIndex">标题行索引，默认0</param>
    public static DataTable ReadCsvToDataTable(string filePath, int titleRowIndex = 0)
    {
        DataTable dt = new DataTable();
        String csvSplitBy = "(?<=^|,)(\"(?:[^\"]|\"\")*\"|[^,]*)";
        using StreamReader reader = new StreamReader(filePath, System.Text.Encoding.Default, false);
        int i = 0, m = 0;
        reader.Peek();
        while (reader.Peek() > 0)
        {
            m = m + 1;
            string str = reader.ReadLine();
            if (m >= titleRowIndex + 1)
            {
                if (m == titleRowIndex + 1) //如果是字段行，则自动加入字段。
                {
                    MatchCollection mcs = Regex.Matches(str, csvSplitBy);
                    foreach (Match mc in mcs)
                    {
                        dt.Columns.Add(mc.Value); //增加列标题
                    }
                }
                else
                {
                    MatchCollection mcs = Regex.Matches(str, "(?<=^|,)(\"(?:[^\"]|\"\")*\"|[^,]*)");
                    i = 0;
                    System.Data.DataRow dr = dt.NewRow();
                    foreach (Match mc in mcs)
                    {
                        dr[i] = mc.Value;
                        i++;
                    }
                    dt.Rows.Add(dr);  //DataTable 增加一行     
                }
            }
        }
        return dt;
    }
}
